import cv2
import numpy as np
import matplotlib
matplotlib.use("QtAgg")
import matplotlib.pyplot as plt
# Euclidean distance
def distance(p1, p2):
return np.linalg.norm(p1 - p2)
# Rectify a scene and save output
def rectify_scene(angled_file, reference_file, title_label, save_name):
# Load angled image
angled_img = cv2.imread(angled_file)
if angled_img is None:
raise FileNotFoundError(f"Could not load {angled_file}")
angled_rgb = cv2.cvtColor(angled_img, cv2.COLOR_BGR2RGB)
# Load reference image
ref_img = cv2.imread(reference_file)
if ref_img is None:
raise FileNotFoundError(f"Could not load {reference_file}")
ref_rgb = cv2.cvtColor(ref_img, cv2.COLOR_BGR2RGB)
# Click 4 points on angled image
plt.imshow(angled_rgb)
plt.title(f"Select 4 points on {angled_file}")
pts = plt.ginput(4, timeout=0)
plt.close()
src_pts = np.float32(pts)
# Compute rectangular target dimensions
w = int(max(distance(src_pts[0], src_pts[1]), distance(src_pts[2], src_pts[3])))
h = int(max(distance(src_pts[0], src_pts[3]), distance(src_pts[1], src_pts[2])))
dst_pts = np.float32([
[0, 0],
[w - 1, 0],
[w - 1, h - 1],
[0, h - 1]
])
# Compute Homography and Warp Perspective
H, _ = cv2.findHomography(src_pts, dst_pts)
rectified = cv2.warpPerspective(angled_rgb, H, (w, h))
# Visualization: Original | Rectified | Reference
fig, ax = plt.subplots(1, 3, figsize=(18, 7))
ax[0].imshow(angled_rgb)
ax[0].set_title("Original (Angled View)")
ax[0].scatter(src_pts[:, 0], src_pts[:, 1], c='r', s=40)
ax[0].axis('off')
ax[1].imshow(rectified)
ax[1].set_title("Rectified Result")
ax[1].axis('off')
ax[2].imshow(ref_rgb)
ax[2].set_title("Reference (Flat View)")
ax[2].axis('off')
plt.suptitle(title_label, fontsize=16, color='black', fontweight='bold')
plt.tight_layout()
plt.subplots_adjust(wspace=0.05)
# Save figure as PNG
fig.savefig(save_name, bbox_inches='tight', dpi=200)
plt.close(fig)
print(f"[✓] Saved: {save_name}")
return rectified
# Rectifications
rectify_scene("scene_1-b.jpg", "scene_1-a.jpg",
"Scene 1-B → Rectified to 1-A",
"rectified_1-b.png")
rectify_scene("scene_1-c.jpg", "scene_1-a.jpg",
"Scene 1-C → Rectified to 1-A",
"rectified_1-c.png")
rectify_scene("scene_2-a.jpg", "scene_2-b.jpg",
"Scene 2-A → Rectified to 2-B",
"rectified_2-a.png")
from IPython.display import Image, display
# 1-b
display(Image(filename="rectified_1-b.png"))
# 1-c
display(Image(filename="rectified_1-c.png"))
# 2-a
display(Image(filename="rectified_2-a.png"))
The homography transformation was applied to remove perspective distortion from the angled images. This led to each tilted surface appearing fronto-parallel, as if the photo were taken straight on. For each scene, four corner points were manually selected on the planar region of interest, and a 3×3 homography matrix H was estimated using the Direct Linear Transform (DLT) algorithm. Applying inverse warping with H⁻¹ produced the rectified image, restoring the surface’s true rectangular proportions.
Homography works by defining a precise mathematical mapping between two planar views under perspective projection. Because the poster and sign surfaces in these test scenes are flat, their perspective distortions can be corrected perfectly. This technique is foundational in computer vision tasks such as document scanning, augmented-reality overlays, planar surface alignment, and texture mapping where accurate geometric consistency between real and virtual planes is critical.
import cv2
import numpy as np
import matplotlib
matplotlib.use("QtAgg")
import matplotlib.pyplot as plt
# Overlay
def apply_overlay(base_file, overlay_file, save_name):
# Load base and overlay images
base = cv2.imread(base_file)
overlay = cv2.imread(overlay_file)
if base is None or overlay is None:
raise FileNotFoundError("Could not load base or overlay image.")
base_rgb = cv2.cvtColor(base, cv2.COLOR_BGR2RGB)
h_ov, w_ov, _ = overlay.shape
# Select 4 points
plt.imshow(base_rgb)
plt.title("Select 4 corners of the surface (clockwise from top-left)")
pts = plt.ginput(4, timeout=0)
plt.close()
src_pts = np.float32(pts)
print("Selected points:\n", src_pts)
# Destination = corners of overlay
dst_pts = np.float32([
[0, 0],
[w_ov - 1, 0],
[w_ov - 1, h_ov - 1],
[0, h_ov - 1]
])
# Compute Homography & Warp Overlay
H, _ = cv2.findHomography(dst_pts, src_pts)
warped = cv2.warpPerspective(overlay, H, (base.shape[1], base.shape[0]))
# Create mask and composite result
mask = warped > 0
result = base.copy()
result[mask] = warped[mask]
# Layout: Original | Overlay | AR Effect
fig, ax = plt.subplots(1, 3, figsize=(18, 8))
ax[0].imshow(cv2.cvtColor(base, cv2.COLOR_BGR2RGB))
ax[0].scatter(src_pts[:, 0], src_pts[:, 1], c='r', s=40)
ax[0].set_title("Original")
ax[0].axis("off")
ax[1].imshow(cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB))
ax[1].set_title("Overlay")
ax[1].axis("off")
ax[2].imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
ax[2].set_title("AR Effect")
ax[2].axis("off")
plt.suptitle("Augmented Reality Effect via Homography", fontsize=16, color="black", fontweight='bold')
plt.tight_layout()
plt.subplots_adjust(wspace=0.05)
# Save output
fig.savefig(save_name, bbox_inches="tight", dpi=200)
plt.close(fig)
print(f"[✓] Saved: {save_name}")
# AR Effect
apply_overlay("homography.jpg", "overlay.jpg", "part2.png")
Selected points: [[ 174.36932 631.8523 ] [ 770.36365 141.71022] [ 818.6875 1579.9204 ] [ 172.06818 1248.5568 ]] [✓] Saved: part2.png
# Image
display(Image(filename="part2.png"))
For this part, I used a poster hanging on my wall to create an augmented-reality effect through homography. After manually selecting four corner points on the new scene, a second image was warped to match the poster’s perspective so that it appeared naturally anchored within the photo. This demonstrates how a planar homography can accurately project digital content onto real-world surfaces, creating the illusion that the overlay is part of the environment.
What worked best was how precisely the transformation aligned the overlay once the corner points were refined. The edges followed the scene’s perspective almost perfectly, and the illusion of depth felt convincing. The main challenges were lighting differences that made the overlay stand out and coordinate drift when the clicked points weren’t perfectly consistent. Balancing geometric accuracy and visual realism required several refinements, but the result clearly shows how a single homography matrix can fuse digital imagery onto a real surface as if it were physically printed there.
import cv2
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import Delaunay
from skimage.transform import PiecewiseAffineTransform, warp
from scipy.interpolate import Rbf
# Click points
def get_points(img, n_points=10, title="Select corresponding points"):
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title(f"{title} ({n_points} points)")
pts = plt.ginput(n_points, timeout=0)
plt.close()
return np.array(pts, dtype=np.float32)
# TPS Warp
def thin_plate_spline_warp(src_img, src_pts, dst_pts):
h, w = src_img.shape[:2]
grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h))
flat_x, flat_y = grid_x.flatten(), grid_y.flatten()
rbf_x = Rbf(src_pts[:, 0], src_pts[:, 1], dst_pts[:, 0], function='thin_plate')
rbf_y = Rbf(src_pts[:, 0], src_pts[:, 1], dst_pts[:, 1], function='thin_plate')
map_x = rbf_x(flat_x, flat_y).reshape(h, w).astype(np.float32)
map_y = rbf_y(flat_x, flat_y).reshape(h, w).astype(np.float32)
warped = cv2.remap(src_img, map_x, map_y, interpolation=cv2.INTER_CUBIC)
return warped
# Triangular Mesh Warp
def mesh_warp(src_img, src_pts, dst_pts):
tform = PiecewiseAffineTransform()
tform.estimate(src_pts, dst_pts)
warped_rgb = warp(cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB), tform, output_shape=src_img.shape[:2])
warped_uint8 = (warped_rgb * 255).astype(np.uint8)
warped_bgr = cv2.cvtColor(warped_uint8, cv2.COLOR_RGB2BGR)
return warped_bgr
# Visualization
def show_comparison(src_img, mesh_img, tps_img, dst_img, title, save_name):
fig, ax = plt.subplots(1, 4, figsize=(20, 6))
titles = ["Source", "Triangular Mesh Warp", "Thin-Plate Spline Warp", "Target"]
images = [src_img, mesh_img, tps_img, dst_img]
for a, t, im in zip(ax, titles, images):
a.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
a.set_title(t)
a.axis("off")
plt.suptitle(title, fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig(save_name, bbox_inches="tight", dpi=300)
plt.show()
# Test: 3
digit3_a = cv2.imread("digit_3-a.jpg")
digit3_b = cv2.imread("digit_3-b.jpg")
if digit3_a is None or digit3_b is None:
raise FileNotFoundError("digit_3-a.jpg or digit_3-b.jpg missing.")
print("🖱️ Select corresponding points for Digit 3 (source image)...")
src_pts3 = get_points(digit3_a, n_points=10, title="Digit 3 Source")
print("🖱️ Now select the same points for Digit 3 (target image)...")
dst_pts3 = get_points(digit3_b, n_points=10, title="Digit 3 Target")
mesh3 = mesh_warp(digit3_a, src_pts3, dst_pts3)
tps3 = thin_plate_spline_warp(digit3_a, src_pts3, dst_pts3)
show_comparison(digit3_a, mesh3, tps3, digit3_b, "Digit 3 Warping Comparison", "digit3_comparison.png")
# Test: 7
digit7_a = cv2.imread("digit_7-a.jpg")
digit7_b = cv2.imread("digit_7-b.jpg")
if digit7_a is None or digit7_b is None:
raise FileNotFoundError("digit_7-a.jpg or digit_7-b.jpg missing.")
print("🖱️ Select corresponding points for Digit 7 (source image)...")
src_pts7 = get_points(digit7_a, n_points=10, title="Digit 7 Source")
print("🖱️ Now select the same points for Digit 7 (target image)...")
dst_pts7 = get_points(digit7_b, n_points=10, title="Digit 7 Target")
mesh7 = mesh_warp(digit7_a, src_pts7, dst_pts7)
tps7 = thin_plate_spline_warp(digit7_a, src_pts7, dst_pts7)
show_comparison(digit7_a, mesh7, tps7, digit7_b, "Digit 7 Warping Comparison", "digit7_comparison.png")
# Custom image
face = cv2.imread("face.jpeg")
if face is None:
raise FileNotFoundError("face.jpeg not found.")
# Landmarks (original)
src_pts = np.float32([
# Forehead
[400, 620], [580, 610], [740, 625],
# Left eyebrow
[340, 770], [410, 750], [470, 760],
# Right eyebrow
[670, 750], [730, 755], [790, 765],
# Left eye
[370, 855], [475, 855],
# Right eye
[660, 850], [775, 850],
# Nose
[570, 910], [485, 1020], [660, 1020], [570, 1020],
# Mouth
[450, 1190], [575, 1175], [710, 1185], [575, 1200],
# Chin
[500, 1340], [575, 1370], [650, 1340],
# Jaw
[330, 1220], [830, 1220]
])
# Destination
dst_pts = src_pts.copy()
mesh_face = mesh_warp(face, src_pts, dst_pts)
tps_face = thin_plate_spline_warp(face, src_pts, dst_pts)
show_comparison(face, mesh_face, tps_face, face, "Face Warping Comparison", "face_comparison.png")
import matplotlib.pyplot as plt
import cv2
# Load the saved comparison images
images = [
("Digit 3 Warping Comparison", "digit3_comparison.png"),
("Digit 7 Warping Comparison", "digit7_comparison.png"),
("Face Warping Comparison", "face_comparison.png")
]
# Create a vertical layout (3 rows, 1 column)
fig, axes = plt.subplots(len(images), 1, figsize=(12, 18))
for ax, (title, file) in zip(axes, images):
img = cv2.imread(file)
if img is None:
ax.text(0.5, 0.5, f"{file} not found", ha='center', va='center', color='red', fontsize=12)
ax.axis('off')
continue
ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
ax.set_title(title, fontsize=13, fontweight='bold', color='black', pad=12)
ax.axis('off')
plt.tight_layout()
plt.show()
Two image-warping techniques were applied to the same set of facial landmarks to evaluate how each method handles localized and global deformations. Both methods used identical control points but approached the deformation process differently.
Triangular Mesh Warping applies local affine transformations within each triangle formed by the control points. This gives it fine-grained control over specific regions of the image, making it fast and predictable. However, because each triangle is transformed independently, visible seams or discontinuities can appear where adjacent triangles fail to align perfectly. This effect is most noticeable around sharp features such as the nose or jawline, where abrupt local differences can make the warp look slightly segmented or “cut.” Despite that limitation, it remains efficient and performs well in real-time systems where localized edits or animations are needed.
Thin-Plate Spline (TPS), by contrast, treats the entire deformation as a smooth, continuous function by minimizing bending energy across the image. Rather than transforming triangles individually, TPS models the deformation globally, ensuring that motion at one landmark gradually influences nearby areas. This produces smoother and more natural-looking results, especially for organic surfaces such as human faces. The trade-off is that TPS is computationally more expensive and can occasionally over-smooth regions that require sharper control, like corners or edges.
In practice, the triangular mesh warp tends to preserve structure and sharpness at the cost of continuity, whereas TPS excels in producing visually coherent and realistic transformations across the entire surface. For applications like facial animation or shape morphing, TPS is generally preferred for its smoothness and fluid motion, while the triangular mesh approach remains a strong choice for interactive systems, AR filters, or localized adjustments that don’t require full-frame smoothing.
In this project, the difference between the two methods was clear: the triangular mesh warp introduced small discontinuities around high-contrast features, whereas the TPS deformation appeared smoother and more cohesive. TPS produced a more natural transformation that resembled real facial movement, while the triangular mesh felt stiffer but more controlled. Together, these results highlight the trade-off between computational speed and visual realism in non-rigid image warping.